R code for lecture 17

slugs <- read.table( 'http://www.bio.ic.ac.uk/research/mjcraw/statcomp/data/slugsurvey.txt', header=TRUE)
slug.table <- table(slugs$slugs, slugs$field)
slugtable <- data.frame(table(slugs$slugs, slugs$field))
# numerical version of count variable
slugtable$Var1.num <- as.numeric(as.character(slugtable$Var1))
 
# single mean Poisson model negative log-likelihood
negpois1.LL<-function(p){
LL<-sum(log(dpois(slugs$slugs, lambda=p)))
-LL
}
 
# separate means Poisson log-likelihood
pois2.LL <- function(p){
mu <- p[1] + p[2]*(slugs$field=="Rookery")
LL <- sum(log(dpois(slugs$slugs, lambda=mu)))
LL
}
 
# negative log-likelihood
negpois2.LL <- function(p){
-pois2.LL(p)
}
 
# obtain estimates using nlm
out.pois2 <- nlm(negpois2.LL, c(1.2,1))
# obtain estimates using glm
out2 <- glm(slugs~field, data=slugs, family=poisson)
 
# add predicted means to data frame
slugtable$mu <- predict(out2, type='response', newdata=data.frame(field=slugtable$Var2))
# calculate Poisson probabilities
slugtable$p <- dpois(slugtable$Var1.num, lambda=slugtable$mu)
# add tail probability to X = 10 category
slugtable$p2 <- dpois(slugtable$Var1.num, lambda=slugtable$mu)+ppois(10, lambda=slugtable$mu, lower.tail=F)*(slugtable$Var1.num==10)
# count the number of observations in each field type
n <- table(slugs$field)
# calculate expected counts
slugtable$exp <- slugtable$p2 * n[as.numeric(slugtable$Var2)]
 
# graph Poisson model and raw data
library(lattice)
xyplot(Freq~Var1.num|Var2, data=slugtable, xlab='Count category', panel=function(x, y, subscripts) {
panel.xyplot(x, y, type='h', lineend=1, col='grey', lwd=10)
panel.points(x, slugtable$exp[subscripts], pch=16, cex=.6, col=1, type='o')
})
 
# split probabilities by field type
slug.p <- split(slugtable$p2, slugtable$Var2)
 
# simulate data for field=nursery
rmultinom(1, size=n[1], prob=slug.p[[1]])
# simulate data for field=rookery
rmultinom(1, size=n[2], prob=slug.p[[2]])
# Obtain simulations from both nursery and rookery
sapply(1:2, function(x) rmultinom(1, size=n[x], prob=slug.p[[x]]))
# convert matrix to a vector and save results
out.obs <- as.vector(sapply(1:2, function(x) rmultinom(1, size=n[x], prob=slug.p[[x]])))
# calculate Pearson statistic using both field results
sum((out.obs-slugtable$exp)^2/slugtable$exp)
 
# write things as a function that can be replicated
myfunc <- function() {
as.vector(sapply(1:2, function(x) rmultinom(1,size=n[x],prob=slug.p[[x]])))->out.obs
sum((out.obs-slugtable$exp)^2/slugtable$exp)
}
 
myfunc()
 
# set random seed
set.seed(23)
# obtain 9999 simulated Pearson statistics
replicate(9999, myfunc())->out.pearson
max(out.pearson)
 
# actual Pearson statistic
actual <- sum((slugtable$Freq-slugtable$exp)^2/slugtable$exp)
actual
 
#obtain p-value
out.pearson.all<-c(out.pearson,actual)
sum(out.pearson.all>=actual)/length(out.pearson.all)
 
# what terms are causing the Pearson statistic to be so large: category X >=10 in nursery
round((slugtable$Freq-slugtable$exp)^2/slugtable$exp,3)
 
# graphing the log-likelihood surface
?persp
g <- expand.grid(b0=seq(0.5,2,.05), b1=seq(0.5,2,.05))
 
# calculate log-likelihood on a grid of possible values
g$z <- apply(g, 1, pois2.LL)
 
# arrange z-coordinates in matrix to match x-y grid of values
z.matrix <- matrix(g$z, nrow=length(seq(0.5,2,.05)))
 
# graph surface
persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood')
# add tick marks
persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed")
# rotate around z-axis (theta) and in vertical plane (phi)
persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed", theta=30, phi=30)
# save surface coordinates
out.persp <- persp(seq(0.5,2,.05), seq(0.5,2,.05), z.matrix, xlab='b0', ylab='b1', zlab='log-likelihood', ticktype="detailed", theta=30, phi=30)
# obtain coordinates of projection of MLE onto graph
trans3d(out.pois2$estimate[1], out.pois2$estimate[2], -out.pois2$minimum, out.persp)
# add maximum likelihood estimate to the graph
points(trans3d(out.pois2$estimate[1], out.pois2$estimate[2], -out.pois2$minimum, out.persp), pch=16, col=2)
 
 
# model 1: single mean negative binomial model
negNB.LL <- function(p) {
mu <- p[1]
theta <- p[2]
LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta)))
-LL
}
negNB.LL(c(2,2))
negNB.LL(c(2,4))
 
# model 2: two means negative binomial model
negNB.LL2 <- function(p) {
mu <- p[1] + p[2]*(slugs$field=='Rookery')
theta <- p[3]
LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta)))
-LL
}
 
negNB.LL2(c(2,1,2))
 
# obtain estimates
out.NB1 <- nlm(negNB.LL, c(2,1))
out.NB1
out.NB2 <- nlm(negNB.LL2, c(2,1,1))
out.NB2
 
# fit model 1 using glm.nb function
library(MASS)
out.glm1 <- glm.nb(slugs~1, data=slugs)
summary(out.glm1)
out.NB1$estimate
exp(coef(out.glm1))
 
# fit model 2 using glm.nb function
out.glm2 <- glm.nb(slugs~field, data=slugs)
summary(out.glm2)
out.NB2$estimate
# nursery mean
exp(coef(out.glm2)[1])
# rookery mean
exp(coef(out.glm2)[1]+coef(out.glm2)[2])
 
# Poisson models: common mean and separate means models
out.glm2a <- glm(slugs~field, data=slugs, family=poisson)
out.glm1a <- glm(slugs~1, data=slugs, family=poisson)
 
# obtain log-likelihoods of models
sapply(list(out.glm1, out.glm2, out.glm1a, out.glm2a), logLik)
# Wald test of field effect
summary(out.glm2)
# likelihood ratio test of field effect
anova(out.glm1, out.glm2, test='Chisq')
R Code for Lecture 17 (Monday, October 22, 2012)